# plot boxplots per GC
plt_df <- dplyr::bind_cols(
X,
metadata |> dplyr::select(GC_NAME)
) |>
dplyr::group_by(GC_NAME) |>
dplyr::mutate(
GC_NAME = sprintf("%s (n = %d)", GC_NAME, dplyr::n())
) |>
dplyr::ungroup()
plt_vars <- sort(colnames(X))
plt_ls <- list()
for (plt_var in plt_vars) {
plt_ls[[plt_var]] <- plt_df |>
ggplot2::ggplot() +
ggplot2::aes(
x = reorder(GC_NAME, !!rlang::sym(plt_var)),
y = !!rlang::sym(plt_var),
fill = GC_NAME
) +
ggplot2::geom_boxplot() +
ggplot2::labs(x = "GC Name") +
vthemes::theme_vmodern() +
ggplot2::theme(
axis.text.x = ggplot2::element_text(
angle = 90, hjust = 1, vjust = 0.5
),
legend.position = "none"
)
}
plt <- patchwork::wrap_plots(plt_ls, ncol = 2) +
patchwork::plot_layout(axis_titles = "collect")
subchunkify(
plt, fig_height = 30, fig_width = 10,
caption = "'Distribution of abundance values per feature and GC in (mean-imputed) training data.'"
)